{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 本代码是探究指定时间进入指定地点的人群"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 思路\n",
    "# 打算使用字典的方式存储，按每天的每个时间段进行分类"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 导入模块"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import os\n",
    "from os import path"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "import warnings\n",
    "\n",
    "warnings.filterwarnings(action='ignore')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "from datetime import time"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 定义函数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_data(file_name):\n",
    "    '''\n",
    "    传入的是文件路径\n",
    "    读取并对数据进行处理\n",
    "    '''\n",
    "    file_extension = path.splitext(file_name)[1]\n",
    "    if file_extension == '.csv':\n",
    "        data = pd.read_csv(file_name, encoding='gbk')\n",
    "    elif file_extension == '.xlsx':\n",
    "        data = pd.read_excel(file_name, encoding='gbk')\n",
    "    drop_col = ['imei_', 'area_', 'msisdn_']\n",
    "    data.drop(drop_col, axis=1, inplace=True)\n",
    "    col = data.columns\n",
    "    for each in col:\n",
    "        if data[each].dtype == object:\n",
    "            data[each] = data[each].str.replace('\\t', '')\n",
    "    print('data load successfully')\n",
    "    return data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "def find_cycle(data1):\n",
    "    data1.index = pd.to_datetime(data.update_time_)\n",
    "    # 数据存储\n",
    "    # 刚才似乎每考虑小时\n",
    "\n",
    "    d = {}\n",
    "    for i, j in data1.groupby([\n",
    "        data1.index.day, data1.index.hour, 'device_id_'\n",
    "    ]).imsi_: # 可以发现，这样编写可以找出每天某时间，出现的用户\n",
    "        day = i[0]\n",
    "        hour = i[1]\n",
    "        device = i[2]\n",
    "        if d.get((day, hour)) == None:\n",
    "            d[(day, hour)] = {}\n",
    "        d[(day, hour)].update({device:set(j.to_list())})\n",
    "        \n",
    "    # 开始集合运算\n",
    "    res = {}\n",
    "    flag = 1\n",
    "    device = ['DX-SZSC001', 'LT-SZSC001', 'YD-SZSC001']\n",
    "    for i in device:\n",
    "        for day in d.keys():\n",
    "            if flag:\n",
    "                base = d[day][i]\n",
    "                flag = 0\n",
    "            base &= d[day][i]\n",
    "            if res.get(day) == None:\n",
    "                res[day] = {}\n",
    "            res[day].update({i:base})\n",
    "        flag = 1\n",
    "    return res"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [],
   "source": [
    "def main():\n",
    "    os.listdir('../Project Data/')\n",
    "    os.chdir('../Project Data/') # 切换到数据所在目录\n",
    "    data = get_data('20190524-001.csv')\n",
    "    return find_cycle(data)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 运行代码"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "res = main()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 代码调试"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "os.listdir('../Project Data/')\n",
    "\n",
    "os.chdir('../Project Data/') # 切换到数据所在目录"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "data load successfully\n"
     ]
    }
   ],
   "source": [
    "data = get_data('20190524-001.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>imsi_</th>\n",
       "      <th>update_time_</th>\n",
       "      <th>lac_</th>\n",
       "      <th>cellid_</th>\n",
       "      <th>status_</th>\n",
       "      <th>ap_type_</th>\n",
       "      <th>device_id_</th>\n",
       "      <th>eventid_</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>460110413044304</td>\n",
       "      <td>2019-05-08 20:09:29</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>拒绝</td>\n",
       "      <td>FDD-LTE</td>\n",
       "      <td>DX-SZSC001</td>\n",
       "      <td>Normal Lau</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>460110413042577</td>\n",
       "      <td>2019-05-08 20:09:41</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>拒绝</td>\n",
       "      <td>FDD-LTE</td>\n",
       "      <td>DX-SZSC001</td>\n",
       "      <td>Normal Lau</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>460110425023300</td>\n",
       "      <td>2019-05-08 20:10:14</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>拒绝</td>\n",
       "      <td>FDD-LTE</td>\n",
       "      <td>DX-SZSC001</td>\n",
       "      <td>Normal Lau</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>460110714141865</td>\n",
       "      <td>2019-05-08 20:10:40</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>拒绝</td>\n",
       "      <td>FDD-LTE</td>\n",
       "      <td>DX-SZSC001</td>\n",
       "      <td>Normal Lau</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>460016692088409</td>\n",
       "      <td>2019-05-08 20:08:55</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>拒绝</td>\n",
       "      <td>FDD-LTE</td>\n",
       "      <td>LT-SZSC001</td>\n",
       "      <td>Normal Lau</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             imsi_         update_time_  lac_  cellid_ status_ ap_type_  \\\n",
       "0  460110413044304  2019-05-08 20:09:29     0        0      拒绝  FDD-LTE   \n",
       "1  460110413042577  2019-05-08 20:09:41     0        0      拒绝  FDD-LTE   \n",
       "2  460110425023300  2019-05-08 20:10:14     0        0      拒绝  FDD-LTE   \n",
       "3  460110714141865  2019-05-08 20:10:40     0        0      拒绝  FDD-LTE   \n",
       "4  460016692088409  2019-05-08 20:08:55     0        0      拒绝  FDD-LTE   \n",
       "\n",
       "   device_id_    eventid_  \n",
       "0  DX-SZSC001  Normal Lau  \n",
       "1  DX-SZSC001  Normal Lau  \n",
       "2  DX-SZSC001  Normal Lau  \n",
       "3  DX-SZSC001  Normal Lau  \n",
       "4  LT-SZSC001  Normal Lau  "
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "data1 = data.copy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "data1.index = pd.to_datetime(data.update_time_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>imsi_</th>\n",
       "      <th>update_time_</th>\n",
       "      <th>lac_</th>\n",
       "      <th>cellid_</th>\n",
       "      <th>status_</th>\n",
       "      <th>ap_type_</th>\n",
       "      <th>device_id_</th>\n",
       "      <th>eventid_</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>update_time_</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2019-05-08 20:09:29</th>\n",
       "      <td>460110413044304</td>\n",
       "      <td>2019-05-08 20:09:29</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>拒绝</td>\n",
       "      <td>FDD-LTE</td>\n",
       "      <td>DX-SZSC001</td>\n",
       "      <td>Normal Lau</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019-05-08 20:09:41</th>\n",
       "      <td>460110413042577</td>\n",
       "      <td>2019-05-08 20:09:41</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>拒绝</td>\n",
       "      <td>FDD-LTE</td>\n",
       "      <td>DX-SZSC001</td>\n",
       "      <td>Normal Lau</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019-05-08 20:10:14</th>\n",
       "      <td>460110425023300</td>\n",
       "      <td>2019-05-08 20:10:14</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>拒绝</td>\n",
       "      <td>FDD-LTE</td>\n",
       "      <td>DX-SZSC001</td>\n",
       "      <td>Normal Lau</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019-05-08 20:10:40</th>\n",
       "      <td>460110714141865</td>\n",
       "      <td>2019-05-08 20:10:40</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>拒绝</td>\n",
       "      <td>FDD-LTE</td>\n",
       "      <td>DX-SZSC001</td>\n",
       "      <td>Normal Lau</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2019-05-08 20:08:55</th>\n",
       "      <td>460016692088409</td>\n",
       "      <td>2019-05-08 20:08:55</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>拒绝</td>\n",
       "      <td>FDD-LTE</td>\n",
       "      <td>LT-SZSC001</td>\n",
       "      <td>Normal Lau</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                               imsi_         update_time_  lac_  cellid_  \\\n",
       "update_time_                                                               \n",
       "2019-05-08 20:09:29  460110413044304  2019-05-08 20:09:29     0        0   \n",
       "2019-05-08 20:09:41  460110413042577  2019-05-08 20:09:41     0        0   \n",
       "2019-05-08 20:10:14  460110425023300  2019-05-08 20:10:14     0        0   \n",
       "2019-05-08 20:10:40  460110714141865  2019-05-08 20:10:40     0        0   \n",
       "2019-05-08 20:08:55  460016692088409  2019-05-08 20:08:55     0        0   \n",
       "\n",
       "                    status_ ap_type_  device_id_    eventid_  \n",
       "update_time_                                                  \n",
       "2019-05-08 20:09:29      拒绝  FDD-LTE  DX-SZSC001  Normal Lau  \n",
       "2019-05-08 20:09:41      拒绝  FDD-LTE  DX-SZSC001  Normal Lau  \n",
       "2019-05-08 20:10:14      拒绝  FDD-LTE  DX-SZSC001  Normal Lau  \n",
       "2019-05-08 20:10:40      拒绝  FDD-LTE  DX-SZSC001  Normal Lau  \n",
       "2019-05-08 20:08:55      拒绝  FDD-LTE  LT-SZSC001  Normal Lau  "
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data1.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "LT-SZSC001    19548\n",
       "YD-SZSC001    17894\n",
       "DX-SZSC001     5975\n",
       "Name: device_id_, dtype: int64"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 选取某个时间段的数据\n",
    "data1.loc[time(8,0):time(10, 0)].device_id_.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 数据存储\n",
    "d = {}\n",
    "for i, j in data1.groupby([\n",
    "    data1.index.day, 'device_id_'\n",
    "]).imsi_: # 可以发现，这样编写可以找出每天某时间，出现的用户\n",
    "    day = i[0]\n",
    "    device = i[1]\n",
    "    if d.get(day) == None: # 使用字典的get方法，如果元素不存在，则返回None\n",
    "        d[day] = {}\n",
    "    d[day].update({device:set(j)}) # 为什么叠加不了"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "True"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "d.get(2) == None #"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "dict_keys(['DX-SZSC001', 'LT-SZSC001', 'YD-SZSC001'])\n",
      "dict_keys(['DX-SZSC001', 'LT-SZSC001', 'YD-SZSC001'])\n",
      "dict_keys(['DX-SZSC001', 'LT-SZSC001', 'YD-SZSC001'])\n",
      "dict_keys(['DX-SZSC001', 'LT-SZSC001', 'YD-SZSC001'])\n",
      "dict_keys(['DX-SZSC001', 'LT-SZSC001', 'YD-SZSC001'])\n",
      "dict_keys(['DX-SZSC001', 'LT-SZSC001', 'YD-SZSC001'])\n",
      "dict_keys(['DX-SZSC001', 'LT-SZSC001', 'YD-SZSC001'])\n",
      "dict_keys(['DX-SZSC001', 'LT-SZSC001', 'YD-SZSC001'])\n",
      "dict_keys(['DX-SZSC001', 'LT-SZSC001', 'YD-SZSC001'])\n",
      "dict_keys(['DX-SZSC001', 'LT-SZSC001', 'YD-SZSC001'])\n",
      "dict_keys(['DX-SZSC001', 'LT-SZSC001', 'YD-SZSC001'])\n",
      "dict_keys(['DX-SZSC001', 'LT-SZSC001', 'YD-SZSC001'])\n",
      "dict_keys(['DX-SZSC001', 'LT-SZSC001', 'YD-SZSC001'])\n",
      "dict_keys(['DX-SZSC001', 'LT-SZSC001', 'YD-SZSC001'])\n",
      "dict_keys(['DX-SZSC001', 'LT-SZSC001', 'YD-SZSC001'])\n",
      "dict_keys(['DX-SZSC001', 'LT-SZSC001', 'YD-SZSC001'])\n",
      "dict_keys(['DX-SZSC001', 'LT-SZSC001', 'YD-SZSC001'])\n"
     ]
    }
   ],
   "source": [
    "for i in range(8, 25):\n",
    "    print(d[i].keys())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 开始集合运算\n",
    "\n",
    "res = {}\n",
    "flag = 1\n",
    "device = ['DX-SZSC001', 'LT-SZSC001', 'YD-SZSC001']\n",
    "for i in device:\n",
    "    for day in d.keys():\n",
    "        if flag:\n",
    "            base = d[day][i]\n",
    "            flag = 0\n",
    "        base &= d[day][i]\n",
    "        if res.get(day) == None:\n",
    "            res[day] = {}\n",
    "        res[day].update({i:base})\n",
    "    flag = 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 刚才似乎每考虑小时\n",
    "# 数据存储\n",
    "d = {}\n",
    "for i, j in data1.groupby([\n",
    "    data1.index.day, data1.index.hour, 'device_id_'\n",
    "]).imsi_: # 可以发现，这样编写可以找出每天某时间，出现的用户\n",
    "    day = i[0]\n",
    "    hour = i[1]\n",
    "    device = i[2]\n",
    "    if d.get((day, hour)) == None:\n",
    "        d[(day, hour)] = {}\n",
    "    d[(day, hour)].update({device:set(j.to_list())})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dict_keys(['DX-SZSC001', 'LT-SZSC001', 'YD-SZSC001'])"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 开始定位，某个时间段出现什么人\n",
    "d[(8, 20)].keys()# 时间区间，表示9号，10号，11号"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "用户：{460110267853698, 460000211720632, 460002406691111, 460029571752973, 460110412995758, 460014926623352, 460004202121725} 在这个月的 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24 天的 8-10 点出现了\n"
     ]
    }
   ],
   "source": [
    "res = None\n",
    "day = ''\n",
    "for i, j in d.items():\n",
    "    if not res:\n",
    "        day = ','.join([day, str(i)])\n",
    "        res = j\n",
    "    else:\n",
    "        day = ','.join([day, str(i)])\n",
    "        res &= j\n",
    "print(f'用户：{res} 在这个月的 {day[1:]} 天的 8-10 点出现了')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'1,'"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s = ''\n",
    "','.join(['1', s])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
